Basic codes for Computer Vision¶

To load the image¶

In [1]:
import cv2
import matplotlib.pyplot as plt
image = cv2.imread("frog.jpg")
plt.imshow(image)
Out[1]:
<matplotlib.image.AxesImage at 0x20dc929e170>
No description has been provided for this image

To get the image in original colours¶

This code converts an image from BGR (OpenCV default) to RGB (Matplotlib standard) so colors display correctly, then shows it with a title and grid lines using axis ticks.

In [2]:
image_rgb = cv2.cvtColor(image,cv2.COLOR_BGR2RGB)
plt.grid(True)
plt.title("REAL FROG")
plt.imshow(image_rgb)
Out[2]:
<matplotlib.image.AxesImage at 0x20dca977be0>
No description has been provided for this image

This code prints the image’s height, width, and number of color channels

In [3]:
print("Image height: ",image.shape[0])
print("Image Width: ",image.shape[1])
print("Image Channels: ",image.shape[2])
Image height:  408
Image Width:  612
Image Channels:  3

To convert it into Gray¶

This code converts the image to grayscale and displays it with a title using a gray colormap.

In [4]:
image_gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
plt.title("Gray image of frog.jpg")
plt.imshow(image_gray, cmap="gray")
Out[4]:
<matplotlib.image.AxesImage at 0x20dca9eb9a0>
No description has been provided for this image

To save an converted image¶

In [5]:
cv2.imwrite("frog_copy.jpg",image_gray)
Out[5]:
True

For Gray Level Reduction¶

This code reduces the number of gray levels in the image, so instead of smooth shades, you get fewer shades (like pure black and white when level = 1).

In [6]:
import numpy as np
level = 1
quantized = np.floor(image_gray/(level/256)*level/256)
quantized = quantized.astype("uint8")
plt.imshow(quantized,cmap="gray")
Out[6]:
<matplotlib.image.AxesImage at 0x20dcaaa1a20>
No description has been provided for this image

Splitting and Displaying RGB Color Channels¶

This code extracts the Blue, Green, and Red channels from the RGB image separately and shows them side by side in grayscale.

In [7]:
img = cv2.imread("mandril_color.tif")
rgb_img = cv2.cvtColor(img,cv2.COLOR_BGR2RGB)
blue_channel = img[:, :, 0]
green_channel = img[:, :, 1]
red_channel = img[:, :, 2]
plt.figure(figsize=(12,8))
plt.subplot(2, 2, 1)
plt.imshow(rgb_img)
plt.title("Original Picture")
plt.subplot(2, 2, 2)
plt.imshow(blue_channel, cmap='gray')
plt.title('Blue Channel')
plt.subplot(2, 2, 3)
plt.imshow(green_channel, cmap='gray')
plt.title('Green Channel')
plt.subplot(2, 2, 4)
plt.imshow(red_channel, cmap='gray')
plt.title('Red Channel')
plt.show()
No description has been provided for this image

This code shows how variable assignment (A = image) shares the same data (same ID), while .copy() creates a new separate image in memory with a different ID.

In [8]:
image = cv2.imread("mandril_color.tif")
A= image
B= image.copy()
print("Orginal ID: ",id(image))
print("ID of A image: ",id(A))
print("ID of B image: ",id(B))
Orginal ID:  2258260235088
ID of A image:  2258260235088
ID of B image:  2258225538864

Image Flipping and Multiple Rotations¶

This code shows the original image, flips it vertically and horizontally, then rotates it 90° clockwise and 90° counterclockwise — useful for understanding different transformations.

In [9]:
pic = cv2.imread("cat.jpg")
pic_rgb = cv2.cvtColor(pic,cv2.COLOR_BGR2RGB)
plt.figure(figsize=(10,6))
plt.subplot(2,3,1)
plt.imshow(pic_rgb)
plt.subplot(2,3,2)
flip_ver = cv2.flip(pic,0)
plt.imshow(cv2.cvtColor(flip_ver,cv2.COLOR_BGR2RGB))
plt.subplot(2,3,3)
flip_hor= cv2.flip(pic,1)
plt.imshow(cv2.cvtColor(flip_hor,cv2.COLOR_BGR2RGB))
plt.subplot(2,3,4)
cat_rotate = cv2.rotate(pic,cv2.ROTATE_90_CLOCKWISE)
plt.imshow(cv2.cvtColor(cat_rotate,cv2.COLOR_BGR2RGB))
plt.subplot(2,3,5)
cat_rotate2 = cv2.rotate(pic,cv2.ROTATE_90_COUNTERCLOCKWISE)
plt.imshow(cv2.cvtColor(cat_rotate2,cv2.COLOR_BGR2RGB))
Out[9]:
<matplotlib.image.AxesImage at 0x20dcaf33df0>
No description has been provided for this image

To Crop¶

This code loads an image, then crops a specific region by selecting rows 35–110 and columns 60–200, and shows the original and cropped parts side by side.
image[rows, columns] = image[height, width] = image[startY:endY, startX:endX] (all mean the same for cropping).

In [10]:
picture = cv2.imread("puppies.jpeg")
crop = picture[35:110,60:200]
plt.figure(figsize=(12,6))
plt.subplot(1,2,1)
plt.imshow(cv2.cvtColor(picture,cv2.COLOR_BGR2RGB))
plt.subplot(1,2,2)
plt.imshow(cv2.cvtColor(crop,cv2.COLOR_BGR2RGB))
Out[10]:
<matplotlib.image.AxesImage at 0x20dcaf8cbe0>
No description has been provided for this image

Changing Specific Image Pixels¶

Create blank image: blank = np.zeros((height, width, 3), dtype=np.uint8)

Draw rectangle: cv2.rectangle(blank, (x1,y1), (x2,y2), (B,G,R), thickness)

Fill region: blank[startY:endY, startX:endX] = [B,G,R]

Draw circle: cv2.circle(blank, (centerX, centerY), radius, (B,G,R), thickness)

Draw line: cv2.line(blank, (x1,y1), (x2,y2), (B,G,R), thickness)

Draw polygon outline: cv2.polylines(blank, [points], isClosed, (B,G,R), thickness)

Draw filled polygon: cv2.fillPoly(blank, [points], (B,G,R))

In [11]:
blank = np.zeros((700,700,3),dtype=np.uint8)
plt.imshow(blank)
Out[11]:
<matplotlib.image.AxesImage at 0x20dcb074550>
No description has been provided for this image
In [12]:
cv2.rectangle(blank, (0, 100), (400, 500), (0, 255, 0), -1)
plt.imshow(cv2.cvtColor(blank, cv2.COLOR_BGR2RGB))
Out[12]:
<matplotlib.image.AxesImage at 0x20dccb85b40>
No description has been provided for this image
In [13]:
blank[0:100,:] = [0,0,255]
plt.imshow(cv2.cvtColor(blank, cv2.COLOR_BGR2RGB))
Out[13]:
<matplotlib.image.AxesImage at 0x20dccbf6b00>
No description has been provided for this image
In [14]:
blank[100:700,400:700] = [255,0,0]
plt.imshow(cv2.cvtColor(blank,cv2.COLOR_BGR2RGB))
Out[14]:
<matplotlib.image.AxesImage at 0x20dccc71b10>
No description has been provided for this image
In [15]:
cv2.circle(blank, (200, 600), 100, (0, 0, 255), -1)  
plt.imshow(cv2.cvtColor(blank, cv2.COLOR_BGR2RGB))
Out[15]:
<matplotlib.image.AxesImage at 0x20dcccf93c0>
No description has been provided for this image

To write Text¶

cv2.putText(image, 'Your Text', (x, y), font, fontScale, (B, G, R), thickness)

In [16]:
image = cv2.imread("nature.jpg")
cv2.putText(image, 'Nature is Beautiful.', (0, 200), cv2.FONT_HERSHEY_SCRIPT_COMPLEX, 2, (0, 255, 255), 3)
plt.imshow(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
Out[16]:
<matplotlib.image.AxesImage at 0x20dce0a82e0>
No description has been provided for this image

Image Histogram¶

An image histogram shows how pixel intensities are distributed in an image, it plots how many pixels have each brightness value from black (0) to white (255). This helps analyze contrast, brightness, and overall exposure

Calculate the pixel intensity histogram of the image¶

hist = cv2.calcHist([image], [channel], None, [numBins], [range])

Plot the histogram as a line graph for smooth curve¶

plt.plot(hist)

Make X-axis values for bar plot (0 to numBins)¶

x = range(numBins)

Plot the histogram as a bar chart for clear bin visualization¶

plt.bar(x, hist[:, 0], width=1.0, color='blue')

In [17]:
img = cv2.imread("nature.jpg",cv2.IMREAD_GRAYSCALE)
plt.figure(figsize=(10,5))
plt.subplot(1,2,1)
plt.imshow(img,cmap="gray")
plt.subplot(1,2,2)
hist = cv2.calcHist([img],[0],None,[256],[0,256])
plt.plot(hist)
x = range(256)
plt.bar(x, hist[:,0], width=1.0, color='yellow') 
plt.show()
No description has been provided for this image

Image Negative Transformation¶

Create negative image by inverting pixel values :negative = 255 - b¶

An image negative is created by inverting all the pixel intensities, bright areas become dark and dark areas become bright. This is commonly used in photography and image processing to highlight details that may be hard to see in the original image

In [18]:
a = cv2.imread("bird.jpeg")
b = cv2.imread("bird.jpeg",cv2.IMREAD_GRAYSCALE)
negative = 255 - b
plt.figure(figsize=(8,4))
plt.subplot(2,2,1)
plt.imshow(cv2.cvtColor(a,cv2.COLOR_BGR2RGB))
plt.subplot(2,2,3)
plt.imshow(b,cmap="gray")
plt.subplot(2,2,4)
plt.imshow(negative,cmap="gray")
Out[18]:
<matplotlib.image.AxesImage at 0x20dccb9fc70>
No description has been provided for this image

Brightness and Contrast Adjustment¶

Brightness and contrast adjustment changes how light or dark an image appears and how strong the difference is between dark and bright areas. It’s used to improve visibility, highlight details, or prepare images for further processing. Use alpha to control contrast (>1 = more contrast) and beta to control brightness (positive = brighter). cv2.convertScaleAbs() safely applies both and keeps pixel values valid.

In [19]:
x = cv2.imread("eye.jpeg")
y = cv2.imread("eye.jpeg",cv2.IMREAD_GRAYSCALE)
a = 1.2
b = 50
adjusted_colour = cv2.convertScaleAbs(x, alpha=a, beta=b)
adjusted_gray = cv2.convertScaleAbs(y, alpha=a, beta=b)
plt.figure(figsize=(10,5))
plt.subplot(2,2,1)
plt.imshow(cv2.cvtColor(x,cv2.COLOR_BGR2RGB))
plt.subplot(2,2,2)
plt.imshow(cv2.cvtColor(adjusted_colour,cv2.COLOR_BGR2RGB))
plt.subplot(2,2,3)
plt.imshow(y, cmap="gray")
plt.subplot(2,2,4)
plt.imshow(adjusted_gray,cmap="gray")
Out[19]:
<matplotlib.image.AxesImage at 0x20dcae98be0>
No description has been provided for this image

Histogram Equalization¶

Histogram equalization improves the contrast of an image by spreading out pixel intensity values more evenly across the full range. This makes dark areas brighter and bright areas clearer, revealing hidden details. cv2.equalizeHist() spreads pixel values across the full range, original image stays dull, equalized image pops with clear contrast. plt.hist(image.ravel(), numBins, [min, max]) it shows how pixel intensities are distributed in an image.

In [20]:
import cv2
import matplotlib.pyplot as plt
B = cv2.imread("elephant.jpeg", cv2.IMREAD_GRAYSCALE)
adjust = cv2.equalizeHist(B)
plt.figure(figsize=(10, 5))
plt.subplot(2, 2, 1)
plt.imshow(B, cmap="gray")
plt.title("Original Image")
plt.subplot(2, 2, 2)
plt.imshow(adjust, cmap="gray")
plt.title("Equalized Image")
plt.subplot(2, 2, 3)
plt.hist(B.ravel(), 256, [0, 256])
plt.title("Original Histogram")
plt.subplot(2, 2, 4)
plt.hist(adjust.ravel(), 256, [0, 256])
plt.title("Equalized Histogram")
plt.tight_layout()
plt.show()
C:\Users\ttahz\AppData\Local\Temp\ipykernel_2248\3309220573.py:13: MatplotlibDeprecationWarning: Passing the range parameter of hist() positionally is deprecated since Matplotlib 3.9; the parameter will become keyword-only in 3.11.
  plt.hist(B.ravel(), 256, [0, 256])
C:\Users\ttahz\AppData\Local\Temp\ipykernel_2248\3309220573.py:16: MatplotlibDeprecationWarning: Passing the range parameter of hist() positionally is deprecated since Matplotlib 3.9; the parameter will become keyword-only in 3.11.
  plt.hist(adjust.ravel(), 256, [0, 256])
No description has been provided for this image

Image Thresholding¶

Thresholding converts a grayscale image to a binary image, pixels above a threshold become white (1), pixels below become black (0). It’s used for separating objects from the background.

ret, thresh_img = cv2.threshold(image, threshold_value, max_value, method)

In [21]:
o = cv2.imread("panda.jpeg",cv2.IMREAD_GRAYSCALE)
ret, thresh_img = cv2.threshold(o, 127 , 255 , cv2.THRESH_BINARY)
plt.subplot(1,2,1)
plt.imshow(o, cmap="gray")
plt.title("Original")
plt.subplot(1,2,2)
plt.imshow(thresh_img, cmap="gray")
plt.title("Thresholded")
plt.show()
No description has been provided for this image

Otsu’s Thresholding automatically calculates the best threshold value to separate foreground and background in a grayscale image, making it easier than picking a threshold manually. Use cv2.threshold(image, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU) to apply it in one step.

Image Resizing¶

Image resizing changes an image’s width and height by scaling it up or down. This is useful for standardizing image sizes, zooming, or fitting images into specific layouts in computer vision tasks.

This code loads an image and resizes it in two ways: the first scales width by 2 (fx=2) and keeps height same (fy=1); the second keeps width same (fx=1) but doubles the height (fy=2). Both use cv2.INTER_CUBIC for smoother results. The resized and original images are shown side by side for comparison.

cv2.resize(image, None, fx, fy, interpolation) → fx = width scale, fy = height scale.

In [22]:
import cv2
import matplotlib.pyplot as plt
image = cv2.imread("cat.jpg")
resized = cv2.resize(image,None, fx=2, fy=1, interpolation=cv2.INTER_CUBIC)
resized2 = cv2.resize(image,None, fx=1, fy=2, interpolation=cv2.INTER_CUBIC)
plt.figure(figsize=(9,3))
plt.subplot(1,3,1)
plt.imshow(cv2.cvtColor(resized,cv2.COLOR_BGR2RGB))
plt.subplot(1,3,2)
plt.imshow(cv2.cvtColor(resized2,cv2.COLOR_BGR2RGB))
plt.subplot(1,3,3)
plt.imshow(cv2.cvtColor(image,cv2.COLOR_BGR2RGB))
Out[22]:
<matplotlib.image.AxesImage at 0x20dcaaf5150>
No description has been provided for this image

Image Translation (Shifting)¶

Image translation shifts an image’s position horizontally or vertically without changing its shape or size. It’s useful for moving objects in an image or for alignment tasks in image processing. This code shifts the image 50 pixels left and 200 pixels down using an affine transformation matrix M. cv2.warpAffine() applies this shift, and the original and shifted images are shown side by side. Translation matrix M = [[1, 0, shiftX], [0, 1, shiftY]] shifts image by (X, Y).

rows, cols = original.shape[:2] gets the image’s height (rows) and width (cols) from its shape, which is needed to tell OpenCV how big the output should be. M = np.float32([[1, 0, -50], [0, 1, 200]]) creates a 2×3 translation matrix in float32 format, the 1 and 0 keep the scale and rotation unchanged, while -50 shifts the image 50 pixels left (x-direction) and 200 shifts it 200 pixels down (y-direction). shifted = cv2.warpAffine(original, M, (rows, cols)) applies this matrix to the image to create a new, shifted version with the same size as the original.

In [23]:
import cv2
import matplotlib.pyplot as plt
import numpy as np
original = cv2.imread("frog_copy.jpg")
rows, cols = original.shape[:2]
M = np.float32([[1,0,-50],[0,1,200]])
shifted = cv2.warpAffine(original, M, (rows,cols))
plt.figure(figsize=(10,5))
plt.subplot(1,2,1)
plt.imshow(original,cmap="gray")
plt.subplot(1,2,2)
plt.imshow(shifted,cmap="gray")
Out[23]:
<matplotlib.image.AxesImage at 0x20dce6c9000>
No description has been provided for this image

Image Rotation¶

Rotation turns an image around a defined point (usually its center) by a specific angle. This is useful for correcting orientation, data augmentation, or aligning objects in computer vision tasks.

The code loads the image and gets its height and width with shape[:2]. The center is set to the middle of the image so it rotates around its center point. cv2.getRotationMatrix2D(center, 180, 1) creates a rotation matrix to rotate the image 180 degrees with no scaling. cv2.warpAffine applies this matrix to produce the rotated image, which is then displayed next to the original for comparison.

In [24]:
org = cv2.imread("elephant.jpeg")
(h,w) = org.shape[:2]
center = (w // 2 , h //2 )
M = cv2.getRotationMatrix2D(center, 180, 1)
rotated = cv2.warpAffine(org, M, (w,h))
plt.figure(figsize=(12,6))
plt.subplot(1,2,1)
plt.imshow(cv2.cvtColor(org,cv2.COLOR_BGR2RGB))
plt.subplot(1,2,2)
plt.imshow(cv2.cvtColor(rotated,cv2.COLOR_BGR2RGB))
Out[24]:
<matplotlib.image.AxesImage at 0x20dcaca8df0>
No description has been provided for this image

Applying Custom Kernel Filtering Using cv2.filter2D¶

In image processing, filtering is used to enhance or detect features in an image by convolving it with a kernel (filter matrix). The cv2.filter2D function allows applying any custom kernel to an image, enabling effects like blurring, sharpening, edge detection, etc. Here, a simple averaging kernel is used to blur the image by smoothing pixel values.

kernal = np.ones((5,5), np.float32) / 30 Creates a 5x5 kernel filled with values summing approximately to 1/30 each, used to average nearby pixel values and blur the image. filterd = cv2.filter2D(N, -1, kernal) Applies the custom kernel to the image N using convolution; -1 keeps the output image depth the same as the input.

In [25]:
N = cv2.imread("norris.webp")
kernal = np.ones((5,5), np.float32) / 30
filterd = cv2.filter2D(N, -1, kernal)
plt.figure(figsize=(12,6))
plt.subplot(1,2,1)
plt.imshow(cv2.cvtColor(N, cv2.COLOR_BGR2RGB))
plt.subplot(1,2,2)
plt.imshow(cv2.cvtColor(filterd, cv2.COLOR_BGR2RGB))
Out[25]:
<matplotlib.image.AxesImage at 0x20dcec7c2e0>
No description has been provided for this image

Image Sharpening Using a Custom KerneL¶

Sharpening enhances the edges and fine details in an image by emphasizing differences in neighboring pixel intensities. This is done by applying a sharpening kernel that highlights high-frequency components. kernel = np.array([[0, -1, 0], [-1, 5, -1], [0, -1, 0]], np.float32): define the sharpening kernel matrix

In [26]:
N = cv2.imread("norris.webp")
kernal = np.array([[0,-1,0],[-1,5,-1],[0,-1,0]],np.float32)
sharpered = cv2.filter2D(N, -1, kernal)
plt.figure(figsize=(12,6))
plt.subplot(1,2,1)
plt.imshow(cv2.cvtColor(N, cv2.COLOR_BGR2RGB))
plt.subplot(1,2,2)
plt.imshow(cv2.cvtColor(sharpered, cv2.COLOR_BGR2RGB))
Out[26]:
<matplotlib.image.AxesImage at 0x20dceccd5d0>
No description has been provided for this image

Sobel Edge Detection¶

Sobel operator is used to detect edges by calculating image intensity gradients in the horizontal (x) and vertical (y) directions. This helps highlight edges where intensity changes sharply. The cv2.Sobel() function calculates the gradient of the image intensity in a specific direction, horizontal when (1, 0) and vertical when (0, 1). Using cv2.CV_64F as the output depth allows negative gradient values to be captured without clipping. Since gradients can have negative and large values, cv2.convertScaleAbs() converts them to absolute 8-bit values suitable for display. Finally, cv2.addWeighted() combines the horizontal and vertical gradients with equal weights to produce a balanced edge image highlighting edges in all directions.

In [27]:
img = cv2.imread("woman_blonde.tif")
grad_x = cv2.Sobel(img, cv2.CV_64F, 1, 0, ksize=3)
grad_y = cv2.Sobel(img, cv2.CV_64F, 0, 1, ksize=3)
abs_grad_x = cv2.convertScaleAbs(grad_x)
abs_grad_y = cv2.convertScaleAbs(grad_y)
edge_image = cv2.addWeighted(abs_grad_x, 0.5, abs_grad_y, 0.5, 0)
plt.figure(figsize=(12,6))
plt.subplot(1,2,1)
plt.imshow(img , cmap= "gray")
plt.subplot(1,2,2)
plt.imshow(edge_image , cmap= "gray")
Out[27]:
<matplotlib.image.AxesImage at 0x20dce5457e0>
No description has been provided for this image

Image Noise Reduction Using Median and Gaussian Blur Filters¶

Median blur is a non-linear filter that replaces each pixel with the median value from its neighborhood, effectively removing salt-and-pepper noise without blurring edges. Gaussian blur, on the other hand, is a linear filter that applies a weighted average based on a Gaussian distribution, producing a smooth and natural blur that reduces random noise and fine details.

Median bur is applied using cv2.medianBlur(img, ksize), where ksize is an odd positive integer like 3, 5, or 7, defining the neighborhood for median calculation to remove impulse noise. Gaussian blur uses cv2.GaussianBlur(img, (kx, ky), sigmaX), with (kx, ky) as an odd-sized kernel (e.g., (3,3), (5,5)) and sigmaX as the standard deviation; setting sigmaX to 0 lets OpenCV compute it automatically. These parameters control the strength of noise reduction and smoothing.

In [28]:
img = cv2.imread("lena_gray_512.tif")
median = cv2.medianBlur(img, 7)
plt.figure(figsize=(12,6))
plt.subplot(1,2,1)
plt.imshow(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
plt.subplot(1,2,2)
plt.imshow(cv2.cvtColor(median, cv2.COLOR_BGR2RGB))
Out[28]:
<matplotlib.image.AxesImage at 0x20dce546d70>
No description has been provided for this image
In [29]:
img = cv2.imread("lena_gray_256.tif")
blurred = cv2.GaussianBlur(img, (7, 7), 0)
plt.figure(figsize=(12,6))
plt.subplot(1,2,1)
plt.imshow(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
plt.subplot(1,2,2)
plt.imshow(cv2.cvtColor(blurred, cv2.COLOR_BGR2RGB))
Out[29]:
<matplotlib.image.AxesImage at 0x20dcea2ac50>
No description has been provided for this image

Basic ML Algorithms for image classification¶

Logistic Regression¶

In [30]:
import numpy as np
import matplotlib.pyplot as plt
from mpl_toolkits import mplot3d
import torch
from torch.utils.data import Dataset, DataLoader
import torch.nn as nn
In [31]:
class plot_error_surfaces(object):
    def __init__(self, w_range, b_range, X, Y, n_samples=30, go=True):
        W = np.linspace(-w_range, w_range, n_samples)
        B = np.linspace(-b_range, b_range, n_samples)
        w, b = np.meshgrid(W, B)
        Z = np.zeros((30, 30))
        count1 = 0
        self.y = Y.numpy()
        self.x = X.numpy()
        for w1, b1 in zip(w, b):
            count2 = 0
            for w2, b2 in zip(w1, b1):
                yhat = 1 / (1 + np.exp(-1 * (w2 * self.x + b2)))
                Z[count1, count2] = -1 * np.mean(self.y * np.log(yhat + 1e-16) + (1 - self.y) * np.log(1 - yhat + 1e-16))
                count2 += 1
            count1 += 1
        self.Z = Z
        self.w = w
        self.b = b
        self.W = []
        self.B = []
        self.LOSS = []
        self.n = 0
        if go == True:
            plt.figure()
            plt.figure(figsize=(7.5, 5))
            plt.axes(projection='3d').plot_surface(self.w, self.b, self.Z, rstride=1, cstride=1, cmap='viridis', edgecolor='none')
            plt.title('Loss Surface')
            plt.xlabel('w')
            plt.ylabel('b')
            plt.show()
            plt.figure()
            plt.title('Loss Surface Contour')
            plt.xlabel('w')
            plt.ylabel('b')
            plt.contour(self.w, self.b, self.Z)
            plt.show()
    def set_para_loss(self, model, loss):
        self.n += 1
        self.W.append(list(model.parameters())[0].item())
        self.B.append(list(model.parameters())[1].item())
        self.LOSS.append(loss)
    def final_plot(self):
        ax = plt.axes(projection='3d')
        ax.plot_wireframe(self.w, self.b, self.Z)
        ax.scatter(self.W, self.B, self.LOSS, c='r', marker='x', s=200, alpha=1)
        plt.figure()
        plt.contour(self.w, self.b, self.Z)
        plt.scatter(self.W, self.B, c='r', marker='x')
        plt.xlabel('w')
        plt.ylabel('b')
        plt.show()
    def plot_ps(self):
        plt.subplot(121)
        plt.plot(self.x[self.y == 0], self.y[self.y == 0], 'ro')
        plt.plot(self.x[self.y == 1], self.y[self.y == 1] - 1, 'o')
        plt.plot(self.x, self.W[-1] * self.x + self.B[-1])
        plt.xlabel('x')
        plt.ylabel('y')
        plt.ylim((-0.1, 2))
        plt.title('Data Space Iteration: ' + str(self.n))
        plt.show()
        plt.subplot(122)
        plt.contour(self.w, self.b, self.Z)
        plt.scatter(self.W, self.B, c='r', marker='x')
        plt.title('Loss Surface Contour Iteration' + str(self.n))
        plt.xlabel('w')
        plt.ylabel('b')
In [32]:
def PlotStuff(X, Y, model, epoch, leg=True):
    plt.plot(X.numpy(), model(X).detach().numpy(), label=('epoch ' + str(epoch)))
    plt.plot(X.numpy(), Y.numpy(), 'r')
    if leg:
        plt.legend()
In [33]:
torch.manual_seed(0)
Out[33]:
<torch._C.Generator at 0x20dce7ef110>
In [34]:
class Data(Dataset):
    def __init__(self):
        self.x = torch.arange(-1, 1, 0.1).view(-1, 1)
        self.y = torch.zeros(self.x.shape[0], 1)
        self.y[self.x[:, 0] > 0.2] = 1
        self.len = self.x.shape[0]
    def __getitem__(self, index):
        return self.x[index], self.y[index]
    def __len__(self):
        return self.len
In [35]:
data_set = Data()
In [36]:
data_set.x
data_set.y
len(data_set)
x,y = data_set[0]
print("x = {},  y = {}".format(x,y))
x,y = data_set[1]
print("x = {},  y = {}".format(x,y))
plt.plot(data_set.x[data_set.y==0], data_set.y[data_set.y==0], 'ro', label="y=0")
plt.plot(data_set.x[data_set.y==1], data_set.y[data_set.y==1]-1, 'o', label="y=1")
plt.xlabel('x')
plt.legend()          
x = tensor([-1.]),  y = tensor([0.])
x = tensor([-0.9000]),  y = tensor([0.])
Out[36]:
<matplotlib.legend.Legend at 0x20dd8573fa0>
No description has been provided for this image
In [37]:
class logistic_regression(nn.Module):
    def __init__(self, n_inputs):
        super(logistic_regression, self).__init__()
        self.linear = nn.Linear(n_inputs, 1)
    def forward(self, x):
        yhat = torch.sigmoid(self.linear(x))
        return yhat
In [38]:
x,y = data_set[0]
len(x)
Out[38]:
1
In [39]:
model = logistic_regression(1)
In [40]:
criterion= nn.BCELoss()
In [41]:
batch_size = 10
trainloader = DataLoader(dataset=data_set, batch_size=batch_size)
In [42]:
learning_rate = 0.1
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)
In [43]:
get_surface = plot_error_surfaces(15, 13, data_set[:][0], data_set[:][1], 30)
model = logistic_regression(1)
criterion = nn.BCELoss()
trainloader = DataLoader(dataset=data_set, batch_size=5)
optimizer = torch.optim.SGD(model.parameters(), lr=0.01)
epochs = 500
loss_values = []
for epoch in range(epochs):
    for x, y in trainloader:
        yhat = model(x)
        loss = criterion(yhat, y)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        get_surface.set_para_loss(model, loss.tolist())
        loss_values.append(loss)
    if epoch % 20 == 0:
        get_surface.plot_ps()
<Figure size 640x480 with 0 Axes>
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
In [44]:
w = model.state_dict()['linear.weight'].data[0]
b = model.state_dict()['linear.bias'].data[0]
print("w = ", w, "b = ", b)
w =  tensor([2.3160]) b =  tensor(-0.6895)
In [45]:
yhat = model(data_set.x)
yhat = torch.round(yhat)
correct = 0
for prediction, actual in zip(yhat, data_set.y):
    if prediction == actual:
        correct += 1
print("Accuracy: ", correct / len(data_set) * 100, "%")
Accuracy:  100.0 %
In [46]:
LOSS_BGD1 = []
for i in loss_values:
    LOSS_BGD1.append(i.item())
plt.plot(LOSS_BGD1)
plt.xlabel("Iteration")
plt.ylabel("Cost")
Out[46]:
Text(0, 0.5, 'Cost')
No description has been provided for this image

Stochastic Gradient Descent¶

In [47]:
get_surface = plot_error_surfaces(15, 13, data_set[:][0], data_set[:][1], 30)
model = logistic_regression(1)
criterion = nn.BCELoss()
trainloader = DataLoader(dataset=data_set, batch_size=1)
optimizer = torch.optim.SGD(model.parameters(), lr=0.01)
epochs = 100
loss_values = []
for epoch in range(epochs):
    for x, y in trainloader:
        yhat = model(x)
        loss = criterion(yhat, y)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        get_surface.set_para_loss(model, loss.tolist())
        loss_values.append(loss)
    if epoch % 20 == 0:
        get_surface.plot_ps()
<Figure size 640x480 with 0 Axes>
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
In [48]:
w = model.state_dict()['linear.weight'].data[0]
b = model.state_dict()['linear.bias'].data[0]
print("w = ", w, "b = ", b)
w =  tensor([2.5085]) b =  tensor(-0.6814)
In [49]:
yhat = model(data_set.x)
yhat = torch.round(yhat)
correct = 0
for prediction, actual in zip(yhat, data_set.y):
    if prediction == actual:
        correct += 1
print("Accuracy: ", correct / len(data_set) * 100, "%")
Accuracy:  100.0 %
In [50]:
LOSS_BGD1 = []
for i in loss_values:
    LOSS_BGD1.append(i.item())
plt.plot(LOSS_BGD1)
plt.xlabel("Iteration")
plt.ylabel("Cost")
Out[50]:
Text(0, 0.5, 'Cost')
No description has been provided for this image
In [51]:
get_surface = plot_error_surfaces(15, 13, data_set[:][0], data_set[:][1], 30)
model = logistic_regression(1)
criterion = nn.BCELoss()
trainloader = DataLoader(dataset=data_set, batch_size=1)
optimizer = torch.optim.SGD(model.parameters(), lr=1)
epochs = 100
loss_values = []
for epoch in range(epochs):
    for x, y in trainloader:
        yhat = model(x)
        loss = criterion(yhat, y)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        get_surface.set_para_loss(model, loss.tolist())
        loss_values.append(loss)
    if epoch % 20 == 0:
        get_surface.plot_ps()
<Figure size 640x480 with 0 Axes>
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
In [52]:
w = model.state_dict()['linear.weight'].data[0]
b = model.state_dict()['linear.bias'].data[0]
print("w = ", w, "b = ", b)
w =  tensor([17.3061]) b =  tensor(-4.0862)
In [53]:
yhat = model(data_set.x)
yhat = torch.round(yhat)
correct = 0
for prediction, actual in zip(yhat, data_set.y):
    if prediction == actual:
        correct += 1
print("Accuracy: ", correct / len(data_set) * 100, "%")
Accuracy:  100.0 %
In [54]:
LOSS_BGD1 = []
for i in loss_values:
    LOSS_BGD1.append(i.item())
plt.plot(LOSS_BGD1)
plt.xlabel("Iteration")
plt.ylabel("Cost")
Out[54]:
Text(0, 0.5, 'Cost')
No description has been provided for this image

Softmax¶

In [55]:
import torch 
import torch.nn as nn
import torchvision.transforms as transforms
import torchvision.datasets as dsets
import matplotlib.pylab as plt
import numpy as np
In [56]:
def PlotParameters(model): 
    W = model.state_dict()['linear.weight'].data
    w_min = W.min().item()
    w_max = W.max().item()
    fig, axes = plt.subplots(2, 5)
    fig.subplots_adjust(hspace=0.01, wspace=0.1)
    for i, ax in enumerate(axes.flat):
        if i < 10:
            ax.set_xlabel("class: {0}".format(i))
            ax.imshow(W[i, :].view(28, 28), vmin=w_min, vmax=w_max, cmap='seismic')
            ax.set_xticks([])
            ax.set_yticks([])
    plt.show()
def show_data(data_sample):
    plt.imshow(data_sample[0].numpy().reshape(28, 28), cmap='gray')
    plt.title('y = ' + str(data_sample[1]))
In [57]:
train_dataset = dsets.MNIST(root='./data', train=True, download=True, transform=transforms.ToTensor())
print("Training dataset:\n", train_dataset)

validation_dataset = dsets.MNIST(root='./data', train=False, download=True, transform=transforms.ToTensor())
print("Validation dataset:\n", validation_dataset)
Training dataset:
 Dataset MNIST
    Number of datapoints: 60000
    Root location: ./data
    Split: Train
    StandardTransform
Transform: ToTensor()
Validation dataset:
 Dataset MNIST
    Number of datapoints: 10000
    Root location: ./data
    Split: Test
    StandardTransform
Transform: ToTensor()
In [58]:
print("First Image and Label:")
show_data(train_dataset[0])
First Image and Label:
No description has been provided for this image
In [59]:
print("Label of 4th element:", train_dataset[3][1])
print("Image of 4th element:")
show_data(train_dataset[3])
print("Image of 3rd element:")
show_data(train_dataset[2])
Label of 4th element: 1
Image of 4th element:
Image of 3rd element:
No description has been provided for this image
In [60]:
import torch.nn as nn
class SoftMax(nn.Module):
    def __init__(self, input_size, output_size):
        super(SoftMax, self).__init__()
        self.linear = nn.Linear(input_size, output_size)
    def forward(self, x):
        z = self.linear(x)
        return z
In [61]:
input_dim = 28 * 28  
output_dim = 10     
In [62]:
model = SoftMax(input_dim, output_dim)
print("Model:\n", model)
print("Weight shape:", list(model.parameters())[0].size())
print("Bias shape:", list(model.parameters())[1].size())
Model:
 SoftMax(
  (linear): Linear(in_features=784, out_features=10, bias=True)
)
Weight shape: torch.Size([10, 784])
Bias shape: torch.Size([10])
In [63]:
PlotParameters(model)
No description has been provided for this image
In [64]:
X = train_dataset[0][0]
print(X.shape)
X = X.view(-1, 28 * 28)
print(X.shape)
model(X)
torch.Size([1, 28, 28])
torch.Size([1, 784])
Out[64]:
tensor([[-0.2423, -0.4931, -0.0079, -0.2768,  0.0252,  0.0246,  0.3409, -0.0616,
          0.1147, -0.0243]], grad_fn=<AddmmBackward0>)
In [65]:
learning_rate = 0.1
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)
criterion = nn.CrossEntropyLoss()
train_loader = torch.utils.data.DataLoader(dataset=train_dataset, batch_size=100)
validation_loader = torch.utils.data.DataLoader(dataset=validation_dataset, batch_size=5000)
In [66]:
X = train_dataset[0][0]
X = X.view(-1, 28 * 28)
model_output = model(X)
actual = torch.tensor([train_dataset[0][1]])
print("Output: ", model_output)
print("Actual:", actual)
criterion(model_output, actual)
Output:  tensor([[-0.2423, -0.4931, -0.0079, -0.2768,  0.0252,  0.0246,  0.3409, -0.0616,
          0.1147, -0.0243]], grad_fn=<AddmmBackward0>)
Actual: tensor([5])
Out[66]:
tensor(2.2414, grad_fn=<NllLossBackward0>)
In [67]:
softmax = nn.Softmax(dim=1)
probability = softmax(model_output)
print(probability)

-1 * torch.log(probability[0][actual])
tensor([[0.0814, 0.0634, 0.1029, 0.0786, 0.1064, 0.1063, 0.1459, 0.0975, 0.1163,
         0.1012]], grad_fn=<SoftmaxBackward0>)
Out[67]:
tensor([2.2414], grad_fn=<MulBackward0>)
In [68]:
n_epochs = 10
loss_list = []
accuracy_list = []
N_test = len(validation_dataset)
def train_model(n_epochs):
    for epoch in range(n_epochs):
        for x, y in train_loader:
            optimizer.zero_grad()
            z = model(x.view(-1, 28 * 28))
            loss = criterion(z, y)
            loss.backward()
            optimizer.step()
        correct = 0
        for x_test, y_test in validation_loader:
            z = model(x_test.view(-1, 28 * 28))
            _, yhat = torch.max(z.data, 1)
            correct += (yhat == y_test).sum().item()
        accuracy = correct / N_test
        loss_list.append(loss.data)
        accuracy_list.append(accuracy)
train_model(n_epochs)
In [69]:
fig, ax1 = plt.subplots()
color = 'tab:red'
ax1.plot(loss_list, color=color)
ax1.set_xlabel('epoch', color=color)
ax1.set_ylabel('total loss', color=color)
ax1.tick_params(axis='y', color=color)
ax2 = ax1.twinx()
color = 'tab:blue'
ax2.set_ylabel('accuracy', color=color)
ax2.plot(accuracy_list, color=color)
ax2.tick_params(axis='y', color=color)
fig.tight_layout()
No description has been provided for this image
In [70]:
PlotParameters(model)
No description has been provided for this image
In [71]:
Softmax_fn = nn.Softmax(dim=-1)
count = 0
for x, y in validation_dataset:
    z = model(x.reshape(-1, 28 * 28))
    _, yhat = torch.max(z, 1)
    if yhat != y:
        show_data((x, y))
        plt.show()
        print("True label:", y)
        print("Predicted:", yhat)
        print("Probability of predicted class:", torch.max(Softmax_fn(z)).item())
        count += 1
    if count >= 5:
        break
No description has been provided for this image
True label: 5
Predicted: tensor([6])
Probability of predicted class: 0.9904032945632935
No description has been provided for this image
True label: 4
Predicted: tensor([6])
Probability of predicted class: 0.4621366858482361
No description has been provided for this image
True label: 3
Predicted: tensor([2])
Probability of predicted class: 0.6866523623466492
No description has been provided for this image
True label: 6
Predicted: tensor([7])
Probability of predicted class: 0.3429006040096283
No description has been provided for this image
True label: 2
Predicted: tensor([7])
Probability of predicted class: 0.5367340445518494
In [72]:
Softmax_fn = nn.Softmax(dim=-1)
count = 0
for x, y in validation_dataset:
    z = model(x.reshape(-1, 28 * 28))
    _, yhat = torch.max(z, 1)
    if yhat == y:
        show_data((x, y))
        plt.show()
        print("True label:", y)
        print("Predicted:", yhat)
        print("Probability of predicted class:", torch.max(Softmax_fn(z)).item())
        count += 1
    if count >= 5:
        break
No description has been provided for this image
True label: 7
Predicted: tensor([7])
Probability of predicted class: 0.9966927766799927
No description has been provided for this image
True label: 2
Predicted: tensor([2])
Probability of predicted class: 0.9485883712768555
No description has been provided for this image
True label: 1
Predicted: tensor([1])
Probability of predicted class: 0.9752742648124695
No description has been provided for this image
True label: 0
Predicted: tensor([0])
Probability of predicted class: 0.9995631575584412
No description has been provided for this image
True label: 4
Predicted: tensor([4])
Probability of predicted class: 0.945736825466156

Neural Network for XOR¶

Import Required Libraries¶

In [73]:
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import matplotlib.pyplot as plt
from matplotlib.colors import ListedColormap
from torch.utils.data import Dataset, DataLoader

Define Helper Functions¶

In [74]:
# Function to plot decision regions
def plot_decision_regions_2class(model, data_set):
    cmap_light = ListedColormap(['#FFAAAA', '#AAFFAA', '#00AAFF'])
    cmap_bold = ListedColormap(['#FF0000', '#00FF00', '#00AAFF'])
    X = data_set.x.numpy()
    y = data_set.y.numpy()
    h = .02
    x_min, x_max = X[:, 0].min() - 0.1, X[:, 0].max() + 0.1
    y_min, y_max = X[:, 1].min() - 0.1, X[:, 1].max() + 0.1
    xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h))
    XX = torch.Tensor(np.c_[xx.ravel(), yy.ravel()])
    yhat = np.logical_not((model(XX)[:, 0] > 0.5).numpy()).reshape(xx.shape)
    plt.pcolormesh(xx, yy, yhat, cmap=cmap_light, shading='auto')
    plt.plot(X[y[:, 0] == 0, 0], X[y[:, 0] == 0, 1], 'o', label='y=0')
    plt.plot(X[y[:, 0] == 1, 0], X[y[:, 0] == 1, 1], 'ro', label='y=1')
    plt.title("Decision Region")
    plt.legend()
# Function to calculate accuracy
def accuracy(model, data_set):
    return np.mean(data_set.y.view(-1).numpy() == (model(data_set.x)[:, 0] > 0.5).numpy())

Creating Noisy XOR dataset¶

In [75]:
class XOR_Data(Dataset):
    def __init__(self, N_s=100):
        self.x = torch.zeros((N_s, 2))
        self.y = torch.zeros((N_s, 1))
        for i in range(N_s // 4):
            self.x[i, :] = torch.Tensor([0.0, 0.0])
            self.y[i, 0] = torch.Tensor([0.0])
            self.x[i + N_s // 4, :] = torch.Tensor([0.0, 1.0])
            self.y[i + N_s // 4, 0] = torch.Tensor([1.0])
            self.x[i + N_s // 2, :] = torch.Tensor([1.0, 0.0])
            self.y[i + N_s // 2, 0] = torch.Tensor([1.0])
            self.x[i + 3 * N_s // 4, :] = torch.Tensor([1.0, 1.0])
            self.y[i + 3 * N_s // 4, 0] = torch.Tensor([0.0])
        self.x = self.x + 0.01 * torch.randn((N_s, 2))
        self.len = N_s
    def __getitem__(self, index):
        return self.x[index], self.y[index]
    def __len__(self):
        return self.len
    def plot_stuff(self):
        plt.plot(self.x[self.y[:, 0] == 0, 0].numpy(), self.x[self.y[:, 0] == 0, 1].numpy(), 'o', label="y=0")
        plt.plot(self.x[self.y[:, 0] == 1, 0].numpy(), self.x[self.y[:, 0] == 1, 1].numpy(), 'ro', label="y=1")
        plt.legend()
data_set = XOR_Data()
data_set.plot_stuff()
No description has been provided for this image

Define the simple neural network¶

In [76]:
class Net(nn.Module):
    def __init__(self, D_in, H, D_out):
        super(Net, self).__init__()
        self.linear1 = nn.Linear(D_in, H)
        self.linear2 = nn.Linear(H, D_out)
    def forward(self, x):
        x = torch.sigmoid(self.linear1(x))
        x = torch.sigmoid(self.linear2(x))
        return x

Training function¶

In [77]:
def train(data_set, model, criterion, train_loader, optimizer, epochs=5):
    COST = []
    ACC = []
    for epoch in range(epochs):
        total = 0
        for x, y in train_loader:
            optimizer.zero_grad()
            yhat = model(x)
            loss = criterion(yhat, y)
            loss.backward()
            optimizer.step()
            total += loss.item()
        ACC.append(accuracy(model, data_set))
        COST.append(total)
    fig, ax1 = plt.subplots()
    color = 'tab:red'
    ax1.plot(COST, color=color)
    ax1.set_xlabel('Epoch', color=color)
    ax1.set_ylabel('Total Loss', color=color)
    ax1.tick_params(axis='y', color=color)
    ax2 = ax1.twinx()
    color = 'tab:blue'
    ax2.set_ylabel('Accuracy', color=color)
    ax2.plot(ACC, color=color)
    ax2.tick_params(axis='y', color=color)
    fig.tight_layout()
    plt.show()
    return COST

Using one neuron¶

In [78]:
model = Net(D_in=2, H=1, D_out=1)
criterion = nn.BCELoss()
optimizer = torch.optim.SGD(model.parameters(), lr=0.1)
train_loader = DataLoader(dataset=data_set, batch_size=1)
LOSS1 = train(data_set, model, criterion, train_loader, optimizer, epochs=500)
plot_decision_regions_2class(model, data_set)
No description has been provided for this image
No description has been provided for this image

Using 2 neurons¶

In [79]:
model = Net(D_in=2, H=2, D_out=1)
criterion = nn.BCELoss()
optimizer = torch.optim.SGD(model.parameters(), lr=0.1)
train_loader = DataLoader(dataset=data_set, batch_size=1)
LOSS1 = train(data_set, model, criterion, train_loader, optimizer, epochs=500)
plot_decision_regions_2class(model, data_set)
No description has been provided for this image
No description has been provided for this image

Using 3 neurons¶

In [80]:
model = Net(D_in=2, H=3, D_out=1)
criterion = nn.BCELoss()
optimizer = torch.optim.SGD(model.parameters(), lr=0.1)
train_loader = DataLoader(dataset=data_set, batch_size=1)
LOSS1 = train(data_set, model, criterion, train_loader, optimizer, epochs=500)
plot_decision_regions_2class(model, data_set)
No description has been provided for this image
No description has been provided for this image

Training a Neural Network with Momentum¶

In [81]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
import numpy as np
In [82]:
class XOR_Data:
    def __init__(self, n_samples=1000):
        self.X = np.random.randint(0, 2, (n_samples, 2))
        self.y = np.logical_xor(self.X[:, 0], self.X[:, 1]).astype(int)
        self.X = self.X + 0.2 * np.random.randn(*self.X.shape)
        self.X_train, self.X_test, self.y_train, self.y_test = train_test_split(
            self.X, self.y, test_size=0.2, random_state=42
        )
    def get_train(self):
        return torch.tensor(self.X_train, dtype=torch.float32), torch.tensor(self.y_train, dtype=torch.float32)
    def get_test(self):
        return torch.tensor(self.X_test, dtype=torch.float32), torch.tensor(self.y_test, dtype=torch.float32)
In [83]:
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.fc1 = nn.Linear(2, 2)  
        self.fc2 = nn.Linear(2, 1)  
    def forward(self, x):
        x = torch.sigmoid(self.fc1(x))
        x = torch.sigmoid(self.fc2(x))
        return x
In [84]:
def train_model(momentum=0.0):
    model = Net()
    criterion = nn.BCELoss()
    optimizer = optim.SGD(model.parameters(), lr=0.1, momentum=momentum)
    X_train, y_train = data.get_train()
    X_test, y_test = data.get_test()
    loss_history = []
    acc_history = []
    for epoch in range(500):
        optimizer.zero_grad()
        outputs = model(X_train)
        loss = criterion(outputs.squeeze(), y_train)
        loss.backward()
        optimizer.step()
        with torch.no_grad():
            y_pred = model(X_test).squeeze().round()
            acc = accuracy_score(y_test.numpy(), y_pred.numpy())
        loss_history.append(loss.item())
        acc_history.append(acc)
    print(f"Final Accuracy with momentum={momentum}: {acc_history[-1]*100:.2f}%")
    plt.figure(figsize=(10, 4))
    plt.subplot(1, 2, 1)
    plt.title(f"Loss Curve (Momentum={momentum})")
    plt.plot(loss_history)
    plt.xlabel("Epoch")
    plt.ylabel("Loss")
    plt.subplot(1, 2, 2)
    plt.title(f"Accuracy Curve (Momentum={momentum})")
    plt.plot(acc_history)
    plt.xlabel("Epoch")
    plt.ylabel("Accuracy")
    plt.show()
    return model
In [85]:
print("Training with Momentum = 0.0")
data = XOR_Data()  
train_model(momentum=0.0)
Training with Momentum = 0.0
Final Accuracy with momentum=0.0: 44.00%
No description has been provided for this image
Out[85]:
Net(
  (fc1): Linear(in_features=2, out_features=2, bias=True)
  (fc2): Linear(in_features=2, out_features=1, bias=True)
)
In [86]:
print("Training with Momentum = 0.5")
data = XOR_Data()  
train_model(momentum=0.5)
Training with Momentum = 0.5
Final Accuracy with momentum=0.5: 46.50%
No description has been provided for this image
Out[86]:
Net(
  (fc1): Linear(in_features=2, out_features=2, bias=True)
  (fc2): Linear(in_features=2, out_features=1, bias=True)
)
In [87]:
print("Training with Momentum = 0.9")
data = XOR_Data()
train_model(momentum=0.9)
Training with Momentum = 0.9
Final Accuracy with momentum=0.9: 88.00%
No description has been provided for this image
Out[87]:
Net(
  (fc1): Linear(in_features=2, out_features=2, bias=True)
  (fc2): Linear(in_features=2, out_features=1, bias=True)
)

Sigmoid vs ReLU activation¶

In [88]:
import torch
import torch.nn as nn
import torchvision.transforms as transforms
import torchvision.datasets as dsets
import torch.nn.functional as F
import matplotlib.pylab as plt
import numpy as np
torch.manual_seed(2)
Out[88]:
<torch._C.Generator at 0x20dce7ef110>

Net using Sigmoid activation and NetRele using RELU activation¶

In [89]:
class Net(nn.Module):
    def __init__(self, D_in, H1, H2, D_out):
        super(Net, self).__init__()
        self.linear1 = nn.Linear(D_in, H1)
        self.linear2 = nn.Linear(H1, H2)
        self.linear3 = nn.Linear(H2, D_out)
    def forward(self, x):
        x = torch.sigmoid(self.linear1(x))
        x = torch.sigmoid(self.linear2(x))
        x = self.linear3(x)
        return x
In [90]:
class NetRelu(nn.Module):
    def __init__(self, D_in, H1, H2, D_out):
        super(NetRelu, self).__init__()
        self.linear1 = nn.Linear(D_in, H1)
        self.linear2 = nn.Linear(H1, H2)
        self.linear3 = nn.Linear(H2, D_out)
    def forward(self, x):
        x = torch.relu(self.linear1(x))
        x = torch.relu(self.linear2(x))
        x = self.linear3(x)
        return x
In [91]:
def train(model, criterion, train_loader, validation_loader, optimizer, epochs=100):
    useful_stuff = {'training_loss': [], 'validation_accuracy': []}  

    for epoch in range(epochs):
        for i, (x, y) in enumerate(train_loader):
            optimizer.zero_grad()
            z = model(x.view(-1, 28 * 28))
            loss = criterion(z, y)
            loss.backward()
            optimizer.step()
            useful_stuff['training_loss'].append(loss.data.item())

        correct = 0
        for x, y in validation_loader:
            z = model(x.view(-1, 28 * 28))
            _, label = torch.max(z, 1)
            correct += (label == y).sum().item()

        accuracy = 100 * (correct / len(validation_dataset))
        useful_stuff['validation_accuracy'].append(accuracy)

    return useful_stuff
In [92]:
train_dataset = dsets.MNIST(root='./data', train=True, download=True, transform=transforms.ToTensor())
In [93]:
validation_dataset = dsets.MNIST(root='./data', train=False, download=True, transform=transforms.ToTensor())
In [94]:
input_dim = 28 * 28
hidden_dim1 = 50
hidden_dim2 = 50
output_dim = 10
cust_epochs = 10
criterion = nn.CrossEntropyLoss()
train_loader = torch.utils.data.DataLoader(dataset=train_dataset, batch_size=2000, shuffle=True)
validation_loader = torch.utils.data.DataLoader(dataset=validation_dataset, batch_size=5000, shuffle=False)

Train Sigmoid Network¶

In [95]:
model = Net(input_dim, hidden_dim1, hidden_dim2, output_dim)
optimizer = torch.optim.SGD(model.parameters(), lr=0.01)
training_results = train(model, criterion, train_loader, validation_loader, optimizer, epochs=cust_epochs)

Train ReLU Network¶

In [96]:
modelRelu = NetRelu(input_dim, hidden_dim1, hidden_dim2, output_dim)
optimizer = torch.optim.SGD(modelRelu.parameters(), lr=0.01)
training_results_relu = train(modelRelu, criterion, train_loader, validation_loader, optimizer, epochs=cust_epochs)
In [97]:
plt.plot(training_results['training_loss'], label='sigmoid')
plt.plot(training_results_relu['training_loss'], label='relu')
plt.ylabel('loss')
plt.title('training loss iterations')
plt.legend()
Out[97]:
<matplotlib.legend.Legend at 0x20de5f7c6a0>
No description has been provided for this image
In [98]:
plt.plot(training_results['validation_accuracy'], label = 'sigmoid')
plt.plot(training_results_relu['validation_accuracy'], label = 'relu') 
plt.ylabel('validation accuracy')
plt.xlabel('Iteration')   
plt.legend()
Out[98]:
<matplotlib.legend.Legend at 0x20de5da0bb0>
No description has been provided for this image

Object Detection¶

Using Haar Cascade¶

In [99]:
import urllib.request
import cv2
from matplotlib import pyplot as plt
%matplotlib inline
print(cv2.__version__)  
4.12.0
In [100]:
def plt_show(image, title="", gray=False, size=(12, 10)):
    from pylab import rcParams
    temp = image
    if gray == False:
        temp = cv2.cvtColor(temp, cv2.COLOR_BGR2GRAY)   
    rcParams['figure.figsize'] = [10, 10]
    plt.axis("off")
    plt.title(title)
    plt.imshow(temp, cmap='gray')
    plt.show()
In [101]:
def detect_obj(image):
    plt_show(image)
    object_list = detector.detectMultiScale(image)
    print(object_list)    
    for obj in object_list:
        (x, y, w, h) = obj
        cv2.rectangle(image, (x, y), (x + w, y + h), (255, 0, 0), 2)    
    plt_show(image)
In [102]:
haarcascade_url = 'https://raw.githubusercontent.com/andrewssobral/vehicle_detection_haarcascades/master/cars.xml'
haar_name = "cars.xml"
urllib.request.urlretrieve(haarcascade_url, haar_name)
detector = cv2.CascadeClassifier(haar_name)
In [103]:
image_url = "https://s3.us.cloud-object-storage.appdomain.cloud/cf-courses-data/CognitiveClass/CV0101/Dataset/car-road-behind.jpg"
image_name = "car-road-behind.jpg"
urllib.request.urlretrieve(image_url, image_name)
image = cv2.imread(image_name)
plt_show(image)
No description has been provided for this image
In [104]:
detect_obj(image)
No description has been provided for this image
()
No description has been provided for this image